*********************************************************
*This file cleans demographic shares from the Census/ACS*
*********************************************************

***Clean the Census data***
foreach year in 1970 1990 2000 2008 {

use if year==`year' using "$raw_data_lmarket/ipums_census.dta", clear

*Drop institutional group quarters* 
quietly: drop if gqtyped>=100 & gqtyped<=499
*Drop alaska and hawaii*
quietly: drop if statefip==2 | statefip==15

*Restrict to working-age population who are salaried workers*
keep if age>=25 & age<=64

gen ipums_female=(sex==2)
gen ipums_hispanic=(hispan!=0)
gen ipums_white=(race==1)
gen ipums_black=(race==2)
gen ipums_asian=(race==4|race==5)
gen ipums_orace=(race==3 |race==6|race==7)
gen ipums_whitenh=(race==1 & hispan==0)
gen ipums_blacknh=(race==2 & hispan==0)
gen ipums_asiannh=(race==4 & hispan==0)
gen ipums_oracenh=((race==3 | race==5) & hispan==0)

*Education
gen ipums_highschool=(educd<=64)
gen ipums_somecollege=(educd>64 & educd<100)
gen ipums_college=(educd>=100 & educd<=109) | educd==115
gen ipums_masters=(educd>=110 & educd<=116 & educd!=115)
assert ipums_college+ ipums_somecollege+ ipums_highschool+ ipums_masters==1

gen ipums_high=(educd>=100& educd<=116)
gen ipums_low=(educd>=2& educd<=90)

*Age
gen ipums_a25_34=(age>=25 & age<=34)
gen ipums_a35_44=(age>=35 & age<=44)
gen ipums_a45_54=(age>=45 & age<=54)
gen ipums_a55_64=(age>=55 & age<=64)

**Merge czones using geography xwalk**
if  `year'==1970{
gen ctygrp1970=cntygp97
joinby ctygrp1970 using "$project/xwalks/xwalks_geography/ctygrp1970_czone.dta", unmatched(master)
assert czone!=. 
}
else if `year'==1990{
gen puma1990=statefip*10000+puma
joinby puma1990 using "$project/xwalks/xwalks_geography/puma1990_czone.dta", unmatched(master)
assert czone!=. 
}
else if `year'==2000 | `year'==2008{
replace puma=1801 if puma==77777
gen puma2000=statefip*10000+puma
joinby puma2000 using "$project/xwalks/xwalks_geography/puma2000_czone.dta", unmatched(master)
assert czone!=. 
}

**Aggregate at the czone level**
gen full_wt=afac*perwt
collapse (rawsum) ipums_pop=full_wt (mean) ipums_* [w=full_wt], by(czone) fast
foreach var of varlist ipums_*{
rename `var' `var'_`year'
}

save "$clean_data_lmarket/czone`year'_DemographicShares.dta", replace
}

